In [14]:
#!pip install numpy
#!pip install pandas
#!pip install matplotlib
#!pip install seaborn
#!pip install plotly
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# for better interactive visualization
import plotly.graph_objects as go
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')
In [15]:
import os
In [17]:
files=os.listdir('C:\\Users\\manje\\Downloads\\Projects\\Covid-19')

files
Out[17]:
['.ipynb_checkpoints',
 'country_wise_latest.csv',
 'covid_19_clean_complete.csv',
 'Covid_19_project.ipynb',
 'day_wise.csv',
 'full_grouped.csv',
 'usa_country_wise.csv',
 'worldometer_data.csv']
In [11]:
### lets create a function to make our task simpler as we have to read data aggain & again 
def read_data(path,filename):
    return pd.read_csv(path+'/'+filename)
    
In [43]:
path='C:\\Users\\manje\\Downloads\\Projects\\Covid-19'# here is the path of datasets
In [ ]:
 
In [ ]:
 
In [30]:
files
Out[30]:
['.ipynb_checkpoints',
 'country_wise_latest.csv',
 'covid_19_clean_complete.csv',
 'Covid_19_project.ipynb',
 'day_wise.csv',
 'full_grouped.csv',
 'usa_country_wise.csv',
 'worldometer_data.csv']
In [39]:
world_data=read_data(path,'worldometer_data.csv')    #here we perform manually read the data by passing file name of the csv
world_data
Out[39]:
Country/Region Continent Population TotalCases NewCases TotalDeaths NewDeaths TotalRecovered NewRecovered ActiveCases Serious,Critical Tot Cases/1M pop Deaths/1M pop TotalTests Tests/1M pop WHO Region
0 USA North America 3.311981e+08 5032179 NaN 162804.0 NaN 2576668.0 NaN 2292707.0 18296.0 15194.0 492.0 63139605.0 190640.0 Americas
1 Brazil South America 2.127107e+08 2917562 NaN 98644.0 NaN 2047660.0 NaN 771258.0 8318.0 13716.0 464.0 13206188.0 62085.0 Americas
2 India Asia 1.381345e+09 2025409 NaN 41638.0 NaN 1377384.0 NaN 606387.0 8944.0 1466.0 30.0 22149351.0 16035.0 South-EastAsia
3 Russia Europe 1.459409e+08 871894 NaN 14606.0 NaN 676357.0 NaN 180931.0 2300.0 5974.0 100.0 29716907.0 203623.0 Europe
4 South Africa Africa 5.938157e+07 538184 NaN 9604.0 NaN 387316.0 NaN 141264.0 539.0 9063.0 162.0 3149807.0 53044.0 Africa
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
204 Montserrat North America 4.992000e+03 13 NaN 1.0 NaN 10.0 NaN 2.0 NaN 2604.0 200.0 61.0 12220.0 NaN
205 Caribbean Netherlands North America 2.624700e+04 13 NaN NaN NaN 7.0 NaN 6.0 NaN 495.0 NaN 424.0 16154.0 NaN
206 Falkland Islands South America 3.489000e+03 13 NaN NaN NaN 13.0 NaN 0.0 NaN 3726.0 NaN 1816.0 520493.0 NaN
207 Vatican City Europe 8.010000e+02 12 NaN NaN NaN 12.0 NaN 0.0 NaN 14981.0 NaN NaN NaN Europe
208 Western Sahara Africa 5.986820e+05 10 NaN 1.0 NaN 8.0 NaN 1.0 NaN 17.0 2.0 NaN NaN Africa

209 rows × 16 columns

In [38]:
group_data=read_data(path,files[5]) # here we accessing the data using file indexing
group_data
Out[38]:
Date Country/Region Confirmed Deaths Recovered Active New cases New deaths New recovered WHO Region
0 2020-01-22 Afghanistan 0 0 0 0 0 0 0 Eastern Mediterranean
1 2020-01-22 Albania 0 0 0 0 0 0 0 Europe
2 2020-01-22 Algeria 0 0 0 0 0 0 0 Africa
3 2020-01-22 Andorra 0 0 0 0 0 0 0 Europe
4 2020-01-22 Angola 0 0 0 0 0 0 0 Africa
... ... ... ... ... ... ... ... ... ... ...
35151 2020-07-27 West Bank and Gaza 10621 78 3752 6791 152 2 0 Eastern Mediterranean
35152 2020-07-27 Western Sahara 10 1 8 1 0 0 0 Africa
35153 2020-07-27 Yemen 1691 483 833 375 10 4 36 Eastern Mediterranean
35154 2020-07-27 Zambia 4552 140 2815 1597 71 1 465 Africa
35155 2020-07-27 Zimbabwe 2704 36 542 2126 192 2 24 Africa

35156 rows × 10 columns

In [37]:
usa_data=read_data(path,files[6])
usa_data
Out[37]:
UID iso2 iso3 code3 FIPS Admin2 Province_State Country_Region Lat Long_ Combined_Key Date Confirmed Deaths
0 16 AS ASM 16 60.0 NaN American Samoa US -14.271000 -170.132000 American Samoa, US 1/22/20 0 0
1 316 GU GUM 316 66.0 NaN Guam US 13.444300 144.793700 Guam, US 1/22/20 0 0
2 580 MP MNP 580 69.0 NaN Northern Mariana Islands US 15.097900 145.673900 Northern Mariana Islands, US 1/22/20 0 0
3 63072001 PR PRI 630 72001.0 Adjuntas Puerto Rico US 18.180117 -66.754367 Adjuntas, Puerto Rico, US 1/22/20 0 0
4 63072003 PR PRI 630 72003.0 Aguada Puerto Rico US 18.360255 -67.175131 Aguada, Puerto Rico, US 1/22/20 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
627915 84070016 US USA 840 NaN Central Utah Utah US 39.372319 -111.575868 Central Utah, Utah, US 7/27/20 347 1
627916 84070017 US USA 840 NaN Southeast Utah Utah US 38.996171 -110.701396 Southeast Utah, Utah, US 7/27/20 70 0
627917 84070018 US USA 840 NaN Southwest Utah Utah US 37.854472 -111.441876 Southwest Utah, Utah, US 7/27/20 2781 23
627918 84070019 US USA 840 NaN TriCounty Utah US 40.124915 -109.517442 TriCounty, Utah, US 7/27/20 142 0
627919 84070020 US USA 840 NaN Weber-Morgan Utah US 41.271160 -111.914512 Weber-Morgan, Utah, US 7/27/20 2375 24

627920 rows × 14 columns

In [36]:
province_data=read_data(path,files[1])
province_data
Out[36]:
Country/Region Confirmed Deaths Recovered Active New cases New deaths New recovered Deaths / 100 Cases Recovered / 100 Cases Deaths / 100 Recovered Confirmed last week 1 week change 1 week % increase WHO Region
0 Afghanistan 36263 1269 25198 9796 106 10 18 3.50 69.49 5.04 35526 737 2.07 Eastern Mediterranean
1 Albania 4880 144 2745 1991 117 6 63 2.95 56.25 5.25 4171 709 17.00 Europe
2 Algeria 27973 1163 18837 7973 616 8 749 4.16 67.34 6.17 23691 4282 18.07 Africa
3 Andorra 907 52 803 52 10 0 0 5.73 88.53 6.48 884 23 2.60 Europe
4 Angola 950 41 242 667 18 1 0 4.32 25.47 16.94 749 201 26.84 Africa
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
182 West Bank and Gaza 10621 78 3752 6791 152 2 0 0.73 35.33 2.08 8916 1705 19.12 Eastern Mediterranean
183 Western Sahara 10 1 8 1 0 0 0 10.00 80.00 12.50 10 0 0.00 Africa
184 Yemen 1691 483 833 375 10 4 36 28.56 49.26 57.98 1619 72 4.45 Eastern Mediterranean
185 Zambia 4552 140 2815 1597 71 1 465 3.08 61.84 4.97 3326 1226 36.86 Africa
186 Zimbabwe 2704 36 542 2126 192 2 24 1.33 20.04 6.64 1713 991 57.85 Africa

187 rows × 15 columns

In [35]:
day_wise=read_data(path,files[4])     #here we perform calling our read_data function with path which is already given to it, and accessing files by giving index no 0,1,2,3 and so on...
day_wise 
Out[35]:
Date Confirmed Deaths Recovered Active New cases New deaths New recovered Deaths / 100 Cases Recovered / 100 Cases Deaths / 100 Recovered No. of countries
0 2020-01-22 555 17 28 510 0 0 0 3.06 5.05 60.71 6
1 2020-01-23 654 18 30 606 99 1 2 2.75 4.59 60.00 8
2 2020-01-24 941 26 36 879 287 8 6 2.76 3.83 72.22 9
3 2020-01-25 1434 42 39 1353 493 16 3 2.93 2.72 107.69 11
4 2020-01-26 2118 56 52 2010 684 14 13 2.64 2.46 107.69 13
... ... ... ... ... ... ... ... ... ... ... ... ...
183 2020-07-23 15510481 633506 8710969 6166006 282756 9966 169714 4.08 56.16 7.27 187
184 2020-07-24 15791645 639650 8939705 6212290 281164 6144 228736 4.05 56.61 7.16 187
185 2020-07-25 16047190 644517 9158743 6243930 255545 4867 219038 4.02 57.07 7.04 187
186 2020-07-26 16251796 648621 9293464 6309711 204606 4104 134721 3.99 57.18 6.98 187
187 2020-07-27 16480485 654036 9468087 6358362 228693 5415 174623 3.97 57.45 6.91 187

188 rows × 12 columns

Which Country has maximum Total cases, Deaths, Recovered & active cases¶

lets create TreeMap Representation of our data¶

In [40]:
world_data.columns
Out[40]:
Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
       'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
       'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
       'TotalTests', 'Tests/1M pop', 'WHO Region'],
      dtype='object')
In [41]:
columns=['TotalCases','TotalDeaths','TotalRecovered','ActiveCases']
for i in columns:
    fig=px.treemap(world_data[0:20],values=i,path=['Country/Region'],template="plotly_dark",title="<b>TreeMap representation of different Countries w.r.t. their {}</b>".format(i))
    fig.show()
In [ ]:
 

what is the trend of Confirmed Deaths Recovered Active cases¶

Line Plot¶

In [42]:
fig=px.line(day_wise,x="Date",y=["Confirmed","Deaths","Recovered","Active"],title="covid cases w.r.t. date",template="plotly_dark")
fig.show()
In [ ]:
 

Find 10 most effected countries¶

BarPlot Representation of Population to Tests Done Ratio¶

In [44]:
pop_test_ratio=world_data.iloc[0:10]['Population']/world_data.iloc[0:10]['TotalTests']
In [45]:
pop_test_ratio
Out[45]:
0      5.245489
1     16.106896
2     62.365033
3      4.911040
4     18.852446
5    122.115932
6     13.241331
7     10.866949
8     28.269105
9      6.618696
dtype: float64
In [65]:
fig=px.bar(world_data.iloc[0:10],color='Country/Region',y=pop_test_ratio,x='Country/Region',template="plotly_dark",title="<b>Population to Tests done ratio's</b>")
fig.show()
In [ ]:
 

Top 10 countries that are badly affected by corona¶

BarPlot Representation of CoronaViruses Cases w.r.t Time¶

In [66]:
fig=px.bar(world_data.iloc[0:10],x='Country/Region',y=['Serious,Critical','TotalDeaths','TotalRecovered','ActiveCases','TotalCases'],template="plotly")
In [67]:
fig.update_layout({'title':"Coronavirus cases w.r.t. time"})
fig.show()
In [ ]:
 
In [ ]:
 

Top 20 countries of Total Confirmed Cases, Total Recovered Cases, Total Deaths,Total Active Cases¶

In [68]:
world_data.head()
Out[68]:
Country/Region Continent Population TotalCases NewCases TotalDeaths NewDeaths TotalRecovered NewRecovered ActiveCases Serious,Critical Tot Cases/1M pop Deaths/1M pop TotalTests Tests/1M pop WHO Region
0 USA North America 3.311981e+08 5032179 NaN 162804.0 NaN 2576668.0 NaN 2292707.0 18296.0 15194.0 492.0 63139605.0 190640.0 Americas
1 Brazil South America 2.127107e+08 2917562 NaN 98644.0 NaN 2047660.0 NaN 771258.0 8318.0 13716.0 464.0 13206188.0 62085.0 Americas
2 India Asia 1.381345e+09 2025409 NaN 41638.0 NaN 1377384.0 NaN 606387.0 8944.0 1466.0 30.0 22149351.0 16035.0 South-EastAsia
3 Russia Europe 1.459409e+08 871894 NaN 14606.0 NaN 676357.0 NaN 180931.0 2300.0 5974.0 100.0 29716907.0 203623.0 Europe
4 South Africa Africa 5.938157e+07 538184 NaN 9604.0 NaN 387316.0 NaN 141264.0 539.0 9063.0 162.0 3149807.0 53044.0 Africa
In [69]:
world_data['Country/Region'].nunique()
Out[69]:
209
In [ ]:
 

Top 20 countries of Total Confirmed cases¶

In [70]:
fig=px.bar(world_data.iloc[0:20],y='Country/Region',x='TotalCases',color='TotalCases',text="TotalCases")
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total confirmed cases</b>")
fig.show()
In [ ]:
 
In [ ]:
 

Top 20 countries of Total deaths¶

In [71]:
fig=px.bar(world_data.sort_values(by='TotalDeaths',ascending=False)[0:20],y='Country/Region',x='TotalDeaths',color='TotalDeaths',text="TotalDeaths")
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total deaths</b>")
fig.show()
In [ ]:
 

Top 20 countries of Total active cases¶

In [72]:
fig=px.bar(world_data.sort_values(by='ActiveCases',ascending=False)[0:20], y='Country/Region',x='ActiveCases',color='ActiveCases',text='ActiveCases')
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total Active cases")
fig.show()
In [ ]:
 
In [ ]:
 

Top 20 countries of Total Recoveries¶

In [73]:
fig=px.bar(world_data.sort_values(by='TotalRecovered',ascending=False)[:20],y='Country/Region',x='TotalRecovered',color='TotalRecovered',text='TotalRecovered')
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total Recovered")
fig.show()
In [ ]:
 
In [74]:
world_data.columns
Out[74]:
Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
       'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
       'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
       'TotalTests', 'Tests/1M pop', 'WHO Region'],
      dtype='object')
In [75]:
world_data[0:15]['Country/Region'].values
Out[75]:
array(['USA', 'Brazil', 'India', 'Russia', 'South Africa', 'Mexico',
       'Peru', 'Chile', 'Colombia', 'Spain', 'Iran', 'UK', 'Saudi Arabia',
       'Pakistan', 'Bangladesh'], dtype=object)
In [ ]:
 

Pie Chart Representation of stats of worst affected countries¶

Pie Charts in Donut Shape¶

In [77]:
labels=world_data[0:15]['Country/Region'].values
cases=['TotalCases','TotalDeaths','TotalRecovered','ActiveCases']
for i in cases:
    fig=px.pie(world_data[0:15],values=i,names=labels,template="plotly_dark",hole=0.3,title=" {} Recordeded w.r.t. to WHO Region of 15 worst effected countries ".format(i))
    fig.show()
In [ ]:
 
In [ ]:
 

Deaths to Confirmed ratio¶

In [82]:
deaths_to_confirmed=((world_data['TotalDeaths']/world_data['TotalCases']))
fig = px.bar(world_data,x='Country/Region',y=deaths_to_confirmed)
fig.update_layout(title={'text':"Death to confirmed ratio of some  worst effected countries",'xanchor':'left'},template="plotly_dark")
fig.show()
In [ ]:
 
In [ ]:
 

Deaths to recovered ratio¶

In [83]:
deaths_to_recovered=((world_data['TotalDeaths']/world_data['TotalRecovered']))
fig = px.bar(world_data,x='Country/Region',y=deaths_to_recovered)
fig.update_layout(title={'text':"Death to recovered ratio of some  worst effected countries",'xanchor':'left'},template="plotly_dark")
fig.show()
In [ ]:
 
In [ ]:
 

Tests to Confirmed Ratio¶

In [84]:
tests_to_confirmed=((world_data['TotalTests']/world_data['TotalCases']))
fig = px.bar(world_data,x='Country/Region',y=tests_to_confirmed)
fig.update_layout(title={'text':"Tests to confirmed ratio of some  worst effected countries",'xanchor':'left'},template="plotly_dark")
fig.show()
In [ ]:
 

Serious to Deaths Ratio¶

In [85]:
serious_to_death=((world_data['Serious,Critical']/world_data['TotalDeaths']))
fig = px.bar(world_data,x='Country/Region',y=serious_to_death)
fig.update_layout(title={'text':"serious to Death ratio of some  worst effected countries",'xanchor':'left'},template="plotly_dark")
fig.show()
In [ ]:
 

Visualize Confirmed, Active, Recovered , Deaths Cases(entire statistics ) of a particular country¶

In [86]:
group_data.head()
Out[86]:
Date Country/Region Confirmed Deaths Recovered Active New cases New deaths New recovered WHO Region
0 2020-01-22 Afghanistan 0 0 0 0 0 0 0 Eastern Mediterranean
1 2020-01-22 Albania 0 0 0 0 0 0 0 Europe
2 2020-01-22 Algeria 0 0 0 0 0 0 0 Africa
3 2020-01-22 Andorra 0 0 0 0 0 0 0 Europe
4 2020-01-22 Angola 0 0 0 0 0 0 0 Africa
In [87]:
from plotly.subplots import make_subplots  ## for creating subplots in plotly
import plotly.graph_objects as go
In [91]:
def country_visualization(group_data,country):
    
    data=group_data[group_data['Country/Region']==country]
    df=data.loc[:,['Date','Confirmed','Deaths','Recovered','Active']]
    fig = make_subplots(rows=1, cols=4,subplot_titles=("Confirmed", "Active", "Recovered",'Deaths'))
    fig.add_trace(
        go.Scatter(name="Confirmed",x=df['Date'],y=df['Confirmed']),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(name="Active",x=df['Date'],y=df['Active']),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(name="Recovered",x=df['Date'],y=df['Recovered']),
        row=1, col=3
    )

    fig.add_trace(
        go.Scatter(name="Deaths",x=df['Date'],y=df['Deaths']),
        row=1, col=4
    )

    fig.update_layout(height=500, width=1000, title_text="Date Vs Recorded Cases of {}".format(country),template="plotly_dark")
    fig.show()
In [92]:
country_visualization(group_data,'Brazil')
In [93]:
country_visualization(group_data,'US')
In [94]:
country_visualization(group_data, "India")
In [ ]: